%matplotlib notebook
#%matplotlib inline
import pandas as pd
import numpy as np
from collections import Counter
from src.data.load_data import (load_iris, load_wine,
load_diabetes, load_glass,
load_pima_diabetes)
import seaborn as sbn
from matplotlib import pyplot as plt
iris = load_iris()
print(iris.columns)
sbn.pairplot(iris[['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth', 'Class']], hue='Class')
iris_stats = iris.describe()
iris_stats = iris_stats.append(iris.nunique().rename('nunique').astype(int))
iris_stats
sum(iris_stats.loc['nunique'])
fig, ax = plt.subplots(2,2, figsize=(18, 10))
col = ['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth']
ax[0][0].hist(iris[col[0]], bins = int(iris_stats[col[0]]['nunique']))
ax[0][0].set_xlabel(col[0])
ax[0][1].hist(iris[col[1]], bins = int(iris_stats[col[1]]['nunique']))
ax[0][1].set_xlabel(col[1])
ax[1][0].hist(iris[col[2]], bins = int(iris_stats[col[2]]['nunique']))
ax[1][0].set_xlabel(col[2])
ax[1][1].hist(iris[col[3]], bins = int(iris_stats[col[3]]['nunique']))
ax[1][1].set_xlabel(col[3])
plt.show()
fig, ax = plt.subplots(2,2, figsize=(18, 10))
sbn.distplot(iris['SepalLength'], bins = 50, ax=ax[0][0])
sbn.distplot(iris['SepalWidth'], bins=50, ax=ax[0][1])
sbn.distplot(iris['PetalLength'], bins = 50, ax=ax[1][0])
sbn.distplot(iris['PetalWidth'], bins=50, ax=ax[1][1])
plt.show()
wine = load_wine()
print(wine.columns)
sbn.pairplot(wine[['Alcohol', 'MalicAcid', 'Ash', 'AlcalinityOfAsh', 'Magnesium',
'TotalPhenols', 'Flavanoids', 'NonflavanoidPhenols', 'Proanthocyanins',
'ColorIntensity', 'Hue', 'OD280/OD315 of diluted wines', 'Proline', 'Class']], hue='Class')
wine_stats = wine.describe()
wine_stats = wine_stats.append(wine.nunique().rename('nunique').astype(int))
wine_stats
sum(wine_stats.loc['nunique'])
col = wine.columns
fig, ax = plt.subplots(len(col), 1, figsize=(15, len(col)*5))
for i in range(len(col)):
ax[i].hist(wine[col[i]], bins = int(wine_stats[col[i]]['nunique']))
ax[i].set_xlabel(col[i])
plt.show()
col = wine.columns
fig, ax = plt.subplots(len(col), 1, figsize=(15, len(col)*5))
for i in range(len(col)):
sbn.distplot(wine[col[i]], bins = int(wine_stats[col[i]]['nunique']), ax=ax[i])
plt.show()
glass = load_glass()
print(glass.columns)
sbn.pairplot(glass[['RefractiveIndex', 'Sodium', 'Magnesium', 'Aluminum', 'Silicon',
'Potassium', 'Calcium', 'Barium', 'Iron', 'Class']], hue='Class')
glass_stats = glass.describe()
glass_stats = glass_stats.append(glass.nunique().rename('nunique').astype(int))
glass_stats
sum(glass_stats.loc['nunique'])
col = glass.columns
fig, ax = plt.subplots(len(col), 1, figsize=(15, len(col)*5))
for i in range(len(col)):
ax[i].hist(glass[col[i]], bins = int(glass_stats[col[i]]['nunique']))
ax[i].set_xlabel(col[i])
plt.show()
col = glass.columns
fig, ax = plt.subplots(len(col), 1, figsize=(15, len(col)*5))
for i in range(len(col)):
sbn.distplot(glass[col[i]], bins = int(glass_stats[col[i]]['nunique']), ax=ax[i])
plt.show()
diabetes = load_diabetes()
print(diabetes.columns)
print(diabetes.dtypes)
diabetes
sbn.pairplot(diabetes[['Date', 'Time', 'Code', 'Value', 'DateTime']])#, hue='Class')
diabetes_stats = diabetes.describe(include=[np.int64, np.float64, np.datetime64])
diabetes_stats = diabetes_stats.append(diabetes.nunique().rename('nunique').astype(int))
diabetes_stats
sum(diabetes_stats.loc['nunique'])
col = ['Code', 'Value']#, 'DateTime']
fig, ax = plt.subplots(len(col), 1, figsize=(15, len(col)*5))
for i in range(len(col)):
ax[i].hist(diabetes[col[i]], bins = int(diabetes_stats[col[i]]['nunique']))
ax[i].set_xlabel(col[i])
plt.show()
col = col = ['Code', 'Value']
fig, ax = plt.subplots(len(col), 1, figsize=(15, len(col)*5))
for i in range(len(col)):
sbn.distplot(diabetes[col[i]], bins = int(diabetes_stats[col[i]]['nunique']), ax=ax[i])
plt.show()
pima = load_pima_diabetes()
print(pima.columns)
sbn.pairplot(pima[['NbPregnancies', 'PlasmaGlucoseConcentration', 'DiastolicBloodPressure',
'TricepsSkinFoldThickness', 'TwoHourSerumInsulin', 'BMI',
'DiabetesPedigreeFunction', 'Age', 'Class']], hue='Class')
pima_stats = pima.describe()
pima_stats = pima_stats.append(pima.nunique().rename('nunique').astype(int))
pima_stats
sum(pima_stats.loc['nunique'])
col = pima.columns
fig, ax = plt.subplots(len(col), 1, figsize=(15, len(col)*5))
for i in range(len(col)):
ax[i].hist(pima[col[i]], bins = int(pima_stats[col[i]]['nunique']))
ax[i].set_xlabel(col[i])
plt.show()
col = pima.columns
fig, ax = plt.subplots(len(col), 1, figsize=(15, len(col)*5))
for i in range(len(col)):
sbn.distplot(pima[col[i]], bins = int(pima_stats[col[i]]['nunique']), ax=ax[i])
plt.show()
from sklearn.manifold import TSNE
tsne=TSNE(3)
g = tsne.fit_transform(pima[['NbPregnancies', 'PlasmaGlucoseConcentration', 'DiastolicBloodPressure',
'TricepsSkinFoldThickness', 'TwoHourSerumInsulin', 'BMI',
'DiabetesPedigreeFunction', 'Age']].values)
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter3D(g[:,0], g[:,1], g[:,2],c=pima.Class.values)